#!/usr/bin/python
# -*- coding: gbk -*-
import time

from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from slyy.items import SlyyItem


class SlyySpider(BaseSpider):
    name = "slyy2"
    allowed_domains = ["txw1958.blog.163.com"]
    start_urls = ["http://txw1958.blog.163.com/blog/static/188725046201262492446552/"]

    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        h3 = hxs.select('''//*[@id="-3"]/div[2]/div[1]/div/div[2]/div/div[2]/div[1]/div[1]/div/div/h3/span[1]/text()''').extract()
        h3_unicode = "".join(h3)
        yield SlyyItem(head=h3_unicode, url=response.url)

        for url in hxs.select('''//*[@id="$_divTopLink"]/div[1]/a/@href''').extract():
            yield Request(url, callback=self.parse)
